001 /* 002 * RandomSequenceGenerator.java 003 * 004 * Copyright 2003 Sergio Anibal de Carvalho Junior 005 * 006 * This file is part of NeoBio. 007 * 008 * NeoBio is free software; you can redistribute it and/or modify it under the terms of 009 * the GNU General Public License as published by the Free Software Foundation; either 010 * version 2 of the License, or (at your option) any later version. 011 * 012 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 013 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 014 * PURPOSE. See the GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License along with NeoBio; 017 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, 018 * Boston, MA 02111-1307, USA. 019 * 020 * Proper attribution of the author as the source of the software would be appreciated. 021 * 022 * Sergio Anibal de Carvalho Junior mailto:sergioanibaljr@users.sourceforge.net 023 * Department of Computer Science http://www.dcs.kcl.ac.uk 024 * King's College London, UK http://www.kcl.ac.uk 025 * 026 * Please visit http://neobio.sourceforge.net 027 * 028 * This project was supervised by Professor Maxime Crochemore. 029 * 030 */ 031 032 package neobio.textui; 033 034 import java.io.BufferedWriter; 035 import java.io.Writer; 036 import java.io.FileWriter; 037 import java.io.OutputStreamWriter; 038 import java.io.IOException; 039 040 /** 041 * This class is a simple command line based utility for generating random sequences. 042 * 043 * <P>The main method takes three parameters from the command line to generate a 044 * sequence: <CODE>type</CODE>, <CODE>size</CODE> and <CODE>file</CODE>, where: 045 * <UL> 046 * <LI><B><CODE>type</CODE></B> is either <CODE>DNA</CODE> for DNA sequences or 047 * <CODE>PROT</CODE> for protein sequences. 048 * <LI><B><CODE>size</CODE></B> is the number os characters. 049 * <LI><B><CODE>file</CODE></B> (optional) is the name of a file (if ommited, sequence 050 * is written to standard output). 051 * </UL> 052 * </P> 053 * 054 * @author Sergio A. de Carvalho Jr. 055 */ 056 public class RandomSequenceGenerator 057 { 058 /** 059 * Character set for DNA sequences. 060 */ 061 private static final char DNA_CHARS[] = {'A', 'C', 'G', 'T'}; 062 063 /** 064 * Character set for protein sequences. 065 */ 066 private static final char PROT_CHARS[] = {'A','R','N','D','C','Q','E','G','H','I', 067 'L','K','M','F','P','S','T','W','Y','V','B','Z','X'}; 068 069 /** 070 * The main method takes three parameters from the command line to generate a 071 * sequence. See the class description for details. 072 * 073 * @param args command line arguments 074 */ 075 public static void main (String args[]) 076 { 077 Writer output; 078 String seq_type, filename; 079 int size, random; 080 char charset[]; 081 int qty[]; 082 083 try 084 { 085 // get 1st argument (required): file type 086 seq_type = args[0]; 087 088 // get 2nd argument (required): number of characters 089 size = Integer.parseInt(args[1]); 090 } 091 catch (ArrayIndexOutOfBoundsException e) 092 { 093 usage(); 094 System.exit(1); 095 return; 096 } 097 catch (NumberFormatException e) 098 { 099 usage(); 100 System.exit(1); 101 return; 102 } 103 104 // validate character set 105 if (seq_type.equalsIgnoreCase("DNA")) 106 charset = DNA_CHARS; 107 else if (seq_type.equalsIgnoreCase("PROT")) 108 charset = PROT_CHARS; 109 else 110 { 111 // no such option 112 usage(); 113 System.exit(1); 114 return; 115 } 116 117 // validate size 118 if (size < 1) 119 { 120 System.err.println ("Error: size must be greater than 1."); 121 System.exit(1); 122 return; 123 } 124 125 try 126 { 127 // get 3rd argument (optional): file name 128 filename = args[2]; 129 130 try 131 { 132 // open file for writing 133 output = new BufferedWriter (new FileWriter (filename)); 134 } 135 catch (IOException e) 136 { 137 System.err.println ("Error: couldn't open " + filename + " for writing."); 138 e.printStackTrace(); 139 System.exit(2); 140 return; 141 } 142 } 143 catch (ArrayIndexOutOfBoundsException e) 144 { 145 // file name was ommited, use standard output 146 filename = null; 147 output = new OutputStreamWriter (System.out); 148 } 149 150 // alocate a vector of characters 151 qty = new int[charset.length]; 152 153 try 154 { 155 // write sequence 156 for (int i = 0; i < size; i++) 157 { 158 // choose a character randomly 159 random = (int) (Math.random() * charset.length); 160 161 // keep track of how many characters 162 // have been writen 163 qty[random]++; 164 165 output.write(charset[random]); 166 } 167 168 output.flush(); 169 170 if (filename != null) output.close(); 171 } 172 catch (IOException e) 173 { 174 System.err.println ("Error: failed to write sequence."); 175 e.printStackTrace(); 176 System.exit(2); 177 return; 178 } 179 180 // print character distribution 181 //for (int i = 0; i < charset.length; i++) 182 //System.err.println(charset[i] + ": " + qty[i]); 183 184 System.exit(0); 185 } 186 187 /** 188 * Prints command line usage. 189 */ 190 private static void usage () 191 { 192 System.err.println( 193 "\nUsage: RandomSequenceGenerator <type> <size> [<file>]\n\n" + 194 "where:\n\n" + 195 " <type> = DNA for nucleotide sequences\n" + 196 " or PROT for protein sequences\n\n" + 197 " <size> = number os characters\n\n" + 198 " <file> = name of a file to where the sequence is to be written\n" + 199 " (if ommited, sequence is written to standard output)" 200 ); 201 } 202 }